#skim(force(capacites)) # n'amène rien sur une time series au format long
summary(force(capacites))
## dep pop_dpt hbgt_collectif hbgt_locatif
## Length:13 Min. : 75949 Min. : 0 Min. : 2972
## Class1:glue 1st Qu.: 189824 1st Qu.: 4288 1st Qu.:10569
## Class2:character Median : 278217 Median : 6727 Median :13392
## Mode :character Mean : 451357 Mean :12132 Mean :15883
## 3rd Qu.: 479754 3rd Qu.:16248 3rd Qu.:21106
## Max. :1376737 Max. :36347 Max. :34328
## hbgt_plein_air hbgt_hotel hbgt_total sem_01
## Min. : 4383 Min. : 118 Min. : 8947 Min. : 41943
## 1st Qu.: 8886 1st Qu.: 2536 1st Qu.: 26512 1st Qu.: 97033
## Median : 18796 Median : 6912 Median : 60279 Median :121238
## Mean : 35435 Mean : 8377 Mean : 71826 Mean :230674
## 3rd Qu.: 35451 3rd Qu.:10231 3rd Qu.: 84641 3rd Qu.:280736
## Max. :125262 Max. :23963 Max. :216870 Max. :572653
## sem_02 sem_03 sem_04 sem_05
## Min. : 17393 Min. : 19463 Min. : 16806 Min. : 19079
## 1st Qu.: 47342 1st Qu.: 48995 1st Qu.: 47535 1st Qu.: 47770
## Median : 59410 Median : 63320 Median : 55459 Median : 56812
## Mean :123568 Mean :134552 Mean :120702 Mean :127313
## 3rd Qu.:159166 3rd Qu.:182171 3rd Qu.:145802 3rd Qu.:156956
## Max. :381328 Max. :420573 Max. :355608 Max. :366638
## sem_06 sem_07 sem_08 sem_09
## Min. : 18806 Min. : 25031 Min. : 36061 Min. : 37129
## 1st Qu.: 49486 1st Qu.: 69195 1st Qu.: 87477 1st Qu.: 82086
## Median : 67155 Median : 95625 Median :109423 Median : 94732
## Mean :149517 Mean :185570 Mean :208422 Mean :193770
## 3rd Qu.:192143 3rd Qu.:230398 3rd Qu.:245686 3rd Qu.:213628
## Max. :416787 Max. :480970 Max. :509484 Max. :495156
## sem_10 sem_11 sem_12 sem_13
## Min. : 33015 Min. : 27056 Min. : 30182 Min. : 44935
## 1st Qu.: 80766 1st Qu.: 59318 1st Qu.: 75743 1st Qu.:102701
## Median : 90601 Median : 70948 Median : 92362 Median :127688
## Mean :193513 Mean :146192 Mean :182607 Mean :268952
## 3rd Qu.:221660 3rd Qu.:173915 3rd Qu.:221875 3rd Qu.:371400
## Max. :507064 Max. :396748 Max. :512012 Max. :808824
## sem_14 sem_15 sem_16 sem_17
## Min. : 39857 Min. : 50817 Min. : 67829 Min. : 59662
## 1st Qu.: 92792 1st Qu.: 91117 1st Qu.:114064 1st Qu.: 89524
## Median :105131 Median :111756 Median :152124 Median :123062
## Mean :218537 Mean :229040 Mean :279906 Mean :251633
## 3rd Qu.:321637 3rd Qu.:337188 3rd Qu.:407006 3rd Qu.:403721
## Max. :707421 Max. :783785 Max. :937254 Max. :979203
## sem_18 sem_19 sem_20 sem_21
## Min. : 67512 Min. : 92999 Min. : 54932 Min. : 39612
## 1st Qu.:105638 1st Qu.: 138763 1st Qu.:102692 1st Qu.: 64735
## Median :162211 Median : 207948 Median :131935 Median : 94834
## Mean :272584 Mean : 349461 Mean :231538 Mean :162638
## 3rd Qu.:377847 3rd Qu.: 493359 3rd Qu.:325703 3rd Qu.:247816
## Max. :913211 Max. :1231192 Max. :716827 Max. :580704
## sem_22 sem_23 sem_24 sem_25
## Min. : 62978 Min. : 67249 Min. : 72553 Min. : 75505
## 1st Qu.:118656 1st Qu.:115684 1st Qu.: 119928 1st Qu.: 127594
## Median :136799 Median :139464 Median : 144888 Median : 169239
## Mean :270532 Mean :272986 Mean : 302724 Mean : 330518
## 3rd Qu.:388686 3rd Qu.:375733 3rd Qu.: 428489 3rd Qu.: 464569
## Max. :886621 Max. :878293 Max. :1025954 Max. :1121003
## sem_26 sem_27 sem_28 sem_29
## Min. : 84783 Min. : 115709 Min. : 152168 Min. : 155674
## 1st Qu.: 158299 1st Qu.: 180573 1st Qu.: 209592 1st Qu.: 223558
## Median : 200003 Median : 243396 Median : 319541 Median : 370825
## Mean : 378794 Mean : 443976 Mean : 552058 Mean : 611580
## 3rd Qu.: 513192 3rd Qu.: 576698 3rd Qu.: 708130 3rd Qu.: 722514
## Max. :1260598 Max. :1489779 Max. :1773125 Max. :2042483
## sem_30 sem_31 sem_32 sem_33
## Min. : 167292 Min. : 189349 Min. : 213599 Min. : 208458
## 1st Qu.: 241873 1st Qu.: 250514 1st Qu.: 261964 1st Qu.: 255989
## Median : 423421 Median : 409539 Median : 453454 Median : 471754
## Mean : 660041 Mean : 662997 Mean : 728025 Mean : 753688
## 3rd Qu.: 759909 3rd Qu.: 761730 3rd Qu.: 836694 3rd Qu.: 890552
## Max. :2164097 Max. :2244237 Max. :2562412 Max. :2599054
## sem_34 sem_35 sem_36 sem_37
## Min. : 131998 Min. : 71965 Min. : 87644 Min. : 75100
## 1st Qu.: 200760 1st Qu.: 130617 1st Qu.: 133379 1st Qu.: 122924
## Median : 350371 Median : 187677 Median : 180292 Median : 162607
## Mean : 613008 Mean : 374717 Mean : 381906 Mean : 373028
## 3rd Qu.: 718988 3rd Qu.: 472345 3rd Qu.: 531091 3rd Qu.: 529065
## Max. :2152288 Max. :1340889 Max. :1348689 Max. :1294211
## sem_38 sem_39 sem_40 sem_41
## Min. : 66420 Min. : 56896 Min. : 44813 Min. : 49687
## 1st Qu.: 114971 1st Qu.: 93996 1st Qu.: 90766 1st Qu.: 96659
## Median : 141010 Median :126151 Median :107758 Median :110672
## Mean : 327084 Mean :271135 Mean :245424 Mean :242459
## 3rd Qu.: 447897 3rd Qu.:371733 3rd Qu.:332395 3rd Qu.:354415
## Max. :1105771 Max. :872131 Max. :762814 Max. :721886
## sem_42 sem_43 sem_44 sem_45
## Min. : 56364 Min. : 74081 Min. : 78525 Min. : 32034
## 1st Qu.:100918 1st Qu.:118118 1st Qu.:122245 1st Qu.: 75427
## Median :135110 Median :159256 Median :156966 Median : 89083
## Mean :252033 Mean :283844 Mean :288582 Mean :189288
## 3rd Qu.:373916 3rd Qu.:430990 3rd Qu.:446706 3rd Qu.:276069
## Max. :810593 Max. :906239 Max. :890295 Max. :625306
## sem_46 sem_47 sem_48 sem_49
## Min. : 29174 Min. : 25124 Min. : 24002 Min. : 15871
## 1st Qu.: 64817 1st Qu.: 60271 1st Qu.: 53676 1st Qu.: 43952
## Median : 78385 Median : 73355 Median : 68237 Median : 57494
## Mean :170547 Mean :167230 Mean :154722 Mean :129402
## 3rd Qu.:243637 3rd Qu.:246953 3rd Qu.:236731 3rd Qu.:187164
## Max. :575045 Max. :552157 Max. :516119 Max. :396467
## sem_50 sem_51 sem_52 sem_53
## Min. : 18754 Min. : 37299 Min. : 74216 Min. : 12634
## 1st Qu.: 47147 1st Qu.: 89461 1st Qu.:160462 1st Qu.: 26681
## Median : 57458 Median :108732 Median :215260 Median : 29585
## Mean :133086 Mean :196421 Mean :359046 Mean : 61035
## 3rd Qu.:203592 3rd Qu.:288989 3rd Qu.:484494 3rd Qu.: 77267
## Max. :420665 Max. :564821 Max. :864340 Max. :153813
# TODO c'est une time-série, on fera un petit ridge-line plot: https://www.data-to-viz.com/graph/ridgeline.html
hebergement_ts <- force(capacites) %>%
select(dep,starts_with("sem_")) %>%
mutate(sem_53 = 7*sem_53, dep = as.factor(dep)) %>% # la derniere semaine ne fait qu'une journee, ça fait tâche sur les graphes
pivot_longer(-dep, names_to = "semaine", names_prefix = "sem_", values_to="touristes") %>%
mutate_at("semaine",as.numeric) %>%
mutate(date = ymd("2018-01-01")+(semaine-1)*7) %>%
as_tsibble(index=date, key=dep)
# on plot pour voir
ggplot(hebergement_ts)+
geom_line(aes(x=date,y=touristes/1e6, color=dep), size=1)+
scale_fill_continuous(guide = guide_legend()) +
theme_minimal()+
theme(legend.position="bottom") +
ggtitle("Capacité occupée par departement et par semaine") + ylab("Nombre de touristes (Millions) par semaine")
# le meme en ridge line plot
ggplot(hebergement_ts)+
geom_ridgeline( aes(x = date, height = touristes/1e6, y = fct_reorder(dep,touristes)), alpha=0.7) +
scale_fill_continuous(guide = guide_legend()) +
theme(legend.position="bottom") +
scale_x_date(expand = c(0,0)) +
ggtitle("Mais qui dors ou et quand ?", subtitle ="Occupation des hébergements touristiques par departement") + ylab("Nombre de touristes par semaine (Millions) ")
Un ordre s’impose parmi les départements pour rendre ça beau… Ici, c’est l’ordre imposé par le nombre de touristes qui s’applique. On voit une difference entre les departements à tourisme saisonnier et la haute-garonne
Si on veut developper le tourisme, il faut remplir les trous… On a le total de capacité alors on y va
hebergement_pcent_ts <- force(capacites) %>%
mutate(dpt = as.factor(dep)) %>%
group_by(dep) %>%
mutate_at(vars(starts_with("sem_")), ~./hbgt_total/7) %>%
select(dep,starts_with("sem_")) %>%
mutate(sem_53 = 7*sem_53) %>% # la derniere semaine ne fait qu'une journee, ça fait tache sur les graphes
pivot_longer(-dep, names_to = "semaine", names_prefix = "sem_", values_to="touristes") %>%
mutate_at("semaine",as.numeric) %>%
mutate(date = ymd("2018-01-01")+(semaine-1)*7) %>%
as_tsibble(index=date, key=dep)
Sont-ils bien tous des touristes ?
ggplot(hebergement_pcent_ts)+
geom_ridgeline( aes(x = date, height = touristes, y = fct_reorder(dep,touristes)), alpha=0.5, color="darkred", min_height = .5, scale=.5) +
geom_ridgeline( aes(x = date, height = 1, y = fct_reorder(dep,touristes)), alpha=0.01, color="black", scale=.5, size=0.1) +
scale_fill_continuous(guide = guide_legend()) +
theme(legend.position="bottom") +
scale_x_date(expand = c(0,0)) +
ggtitle("Quelle saison touristique dans chaque département ?", subtitle="Occupation des hébergements touristiques par departement, 100 % représenté par la fine ligne noire, on ne représente pas en dessous de 50%") + ylab("Nombre de voyageurs par semaine (pourcent) ")
Hormis dans l’Aveyron et les Hautes Py., la capacité d’hébergement touristique est toujours dépassée. On a donc capturé ici des voyageurs qui ne sont pas des touristes.
# skim(force(nuitees))# n'amène rien sur une time series au format long
summary(nuitees)
## date dpt_09 dpt_11 dpt_12
## Min. :2018-01-01 Min. : 0 Min. : 0 Min. : 0
## 1st Qu.:2018-04-02 1st Qu.: 7192 1st Qu.: 22268 1st Qu.:11089
## Median :2018-07-02 Median :10169 Median : 36910 Median :17433
## Mean :2018-07-02 Mean :12810 Mean : 44248 Mean :21766
## 3rd Qu.:2018-10-01 3rd Qu.:14890 3rd Qu.: 53581 3rd Qu.:26522
## Max. :2018-12-31 Max. :38392 Max. :136945 Max. :71531
## dpt_30 dpt_31 dpt_32 dpt_34
## Min. : 0 Min. : 0 Min. : 0 Min. : 0
## 1st Qu.: 32914 1st Qu.: 65346 1st Qu.: 8723 1st Qu.: 73504
## Median : 50704 Median : 76484 Median :13027 Median :116519
## Mean : 58577 Mean : 78391 Mean :15607 Mean :136345
## 3rd Qu.: 71167 3rd Qu.: 93139 3rd Qu.:17892 3rd Qu.:166126
## Max. :152483 Max. :161793 Max. :55238 Max. :401637
## dpt_46 dpt_48 dpt_65 dpt_66
## Min. : 0 Min. : 0 Min. : 0 Min. : 0
## 1st Qu.: 9775 1st Qu.: 4174 1st Qu.:16688 1st Qu.: 43922
## Median :16892 Median : 8263 Median :27593 Median : 65600
## Mean :21588 Mean : 9642 Mean :30036 Mean : 87119
## 3rd Qu.:25517 3rd Qu.:11919 3rd Qu.:38164 3rd Qu.:100942
## Max. :73458 Max. :34037 Max. :94459 Max. :283421
## dpt_81 dpt_82 total_occitanie
## Min. : 0 Min. : 0 Min. : 0
## 1st Qu.:11270 1st Qu.:11288 1st Qu.: 321768
## Median :15073 Median :15263 Median : 461712
## Mean :17363 Mean :16520 Mean : 550011
## 3rd Qu.:20689 3rd Qu.:19651 3rd Qu.: 656451
## Max. :44735 Max. :38910 Max. :1491112
# TODO c'est une time-série, on fera un petit ridge-line plot: https://www.data-to-viz.com/graph/ridgeline.html
nuitee_ts <- nuitees_td %>% as_tsibble(index= date, key=dep)
ggplot(nuitee_ts)+
geom_line(aes(x=date,y=`nuitees`, color=dep), size=1)+
scale_fill_continuous(guide = guide_legend()) +
theme_minimal()+
theme(legend.position="bottom")
# les NA dans les valeurs rends impossible la comparaison entre nuités et le fct_reorder bouhhh
median_na <- function(x) {
median(x,na.rm = TRUE)
}
# le meme en ridge line plot
ggplot(nuitee_ts)+
geom_ridgeline( aes(x = date, height = nuitees/1e5, y = fct_reorder(dep, `nuitees`, .fun=median_na)), alpha=0.5) +
scale_fill_continuous(guide = guide_legend()) +
theme(legend.position="bottom")+
ggtitle("nombre de nuitées (x100k)") +
scale_x_date(expand = c(0,0))
Il y a plusieurs accidents dans plein de départements simultanément. Là encore même distingo entre le 31 et les autres departements.
Il y a une saisonnalité à la semaine qu’il faut relier aux évènements extérieurs
Est-ce qu’on peut imaginer un effet whaou sur un bubble plot animé style le fameux Gapminder
# apprends-t-on plus avec un zoom ?
nuiteplus <- nuitees %>% na_if(0) %>%
select(-total_occitanie) %>%
timetk::tk_augment_timeseries_signature() %>%
select(date, starts_with("dpt_"),jour_sem=wday.lbl) %>%
xts(. , order.by = .$date)
dygraph(nuiteplus) %>% dyRangeSelector() %>%
dyHighlight(highlightCircleSize = 5,
highlightSeriesBackgroundAlpha = 0.2,
hideOnMouseOut = FALSE) %>%
dyLegend(width = 700, hideOnMouseOut = FALSE)
skim(par_origines)
| Name | par_origines |
| Number of rows | 493235 |
| Number of columns | 8 |
| _______________________ | |
| Column type frequency: | |
| character | 2 |
| Date | 1 |
| numeric | 5 |
| ________________________ | |
| Group variables | None |
Variable type: character
| skim_variable | n_missing | complete_rate | min | max | empty | n_unique | whitespace |
|---|---|---|---|---|---|---|---|
| org | 0 | 1 | 2 | 8 | 0 | 108 | 0 |
| dest | 0 | 1 | 2 | 2 | 0 | 13 | 0 |
Variable type: Date
| skim_variable | n_missing | complete_rate | min | max | median | n_unique |
|---|---|---|---|---|---|---|
| date | 0 | 1 | 2018-01-01 | 2018-12-31 | 2018-07-05 | 365 |
Variable type: numeric
| skim_variable | n_missing | complete_rate | mean | sd | p0 | p25 | p50 | p75 | p100 | hist |
|---|---|---|---|---|---|---|---|---|---|---|
| volume | 0 | 1 | 407.01 | 1175.56 | 0 | 35 | 108 | 324 | 41517 | ▇▁▁▁▁ |
| vacances_org | 0 | 1 | 0.51 | 0.67 | 0 | 0 | 0 | 1 | 2 | ▇▁▅▁▁ |
| temp_midi | 0 | 1 | 17.12 | 7.94 | -7 | 11 | 17 | 24 | 36 | ▁▆▇▇▃ |
| meteo | 0 | 1 | 2.04 | 1.41 | 0 | 1 | 2 | 3 | 4 | ▇▆▇▇▇ |
| nb_evt | 0 | 1 | 0.44 | 0.75 | 0 | 0 | 0 | 1 | 6 | ▇▁▁▁▁ |
# il y a des duplicates. impossible de le faire rentrer dans ne time-series sans les enlever !
origines_ts <- par_origines_td %>%
ungroup %>%
mutate(dep_dest = fct_relevel(dep_dest, c("34","31","11","66","30","65","12","46","82","81","32","09","48"))) %>%
mutate_at("meteo",as.ordered) %>%
#group_by(date, dep_org, dep_dest) %>% summarise_all(~last(.)) %>% # filter duplicate
as_tsibble(index= date, key=c("dep_org","dep_dest"))
# un petit facet-plot pour la route
ggvolume <-ggplot(origines_ts %>% filter(dep_org!="Autres"))+
geom_line(aes(x=date,y=`volume`/1e3, color=dep_org), size=.4, alpha=.6)+
facet_wrap("dep_dest") +
scale_fill_continuous(guide = guide_legend()) + ylim(0,25) +
ggtitle("Mais d'où viennent-ils", subtitle = "Origine identifiée des voyageurs dans chaque departement visité")+
theme_minimal()+ theme(legend.position="none")+ scale_color_viridis_d(option="E")
ggplotly(ggvolume)